In [1]:
import matplotlib.pyplot as plt
import numpy as np
import h5py
import seaborn as sns
import pandas as pd
import scipy.stats as stats
import matplotlib as mpl
from scipy.signal import hilbert
import statsmodels.api as sm

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import OPTICS
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder,StandardScaler, MinMaxScaler

from imblearn.over_sampling import SVMSMOTE
from sklearn.svm import SVC
import tensorflow.keras.layers as layers
from tensorflow.keras import Model
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_score, recall_score,  f1_score
from tensorflow.keras import callbacks
from tensorflow.keras.models import load_model
from imblearn import under_sampling
from sklearn.utils import class_weight
from sklearn.metrics import confusion_matrix
from keras.utils import to_categorical
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go


%matplotlib inline
Using TensorFlow backend.

Plot settings

In [2]:
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['xtick.labelsize'] = 15
plt.rcParams['ytick.labelsize'] = 15
plt.rcParams['legend.fontsize'] = 20
plt.rcParams['figure.titlesize'] = 26
plt.rcParams['xtick.major.size'] = 10
plt.rcParams['xtick.major.width'] = 1
plt.rcParams['ytick.major.size'] = 10
plt.rcParams['ytick.major.width'] = 1
plt.rcParams['xtick.minor.width'] = 1
plt.rcParams['ytick.minor.size'] = 5
plt.rcParams['ytick.minor.width'] = 1
plt.rcParams['xtick.minor.size'] = 5
plt.rcParams['figure.figsize'] = 10,7
sns.set_style('ticks')
mpl.style.use('fivethirtyeight')

Given script

In [3]:
# Sample frequency
f_sample = 12e3

# Axis rotation frequency
f_rotation = 28.68
bpfo_multiplier = 3.5848
f_bpfo = f_rotation * bpfo_multiplier
f_bpfo_harmonics = f_bpfo*(1+np.arange(20))

print("Expected BPFO first frequency " + str(f_bpfo) + " Hz")
print("Expected BPFO harmonic frequencies " + str(f_bpfo_harmonics) + " Hz")

hf = h5py.File("x_baseline.h5", "r")
x_baseline = np.array(hf.get("x_baseline"))

hf = h5py.File("x_fault.h5", "r")
x_fault = np.array(hf.get("x_fault"))
Expected BPFO first frequency 102.81206399999999 Hz
Expected BPFO harmonic frequencies [ 102.812064  205.624128  308.436192  411.248256  514.06032   616.872384
  719.684448  822.496512  925.308576 1028.12064  1130.932704 1233.744768
 1336.556832 1439.368896 1542.18096  1644.993024 1747.805088 1850.617152
 1953.429216 2056.24128 ] Hz
  • Goal 1.What is the difference s between a good bearing and a faulty one? Also identify the BPFO.

Exploratory data analysis

In [4]:
print(f' Mean Baseline : {x_baseline.ravel().mean().round(3)}')
print(f' STD Baseline : {x_baseline.ravel().std().round(3)}')
 Mean Baseline : -0.0
 STD Baseline : 1.0
In [5]:
print(f' Mean Faulty : {x_fault.ravel().mean().round(3)}')
print(f' STD Faulty : {x_fault.ravel().std().round(3)}')
 Mean Faulty : 0.0
 STD Faulty : 1.001
  • Data is standarized to begin with.
In [6]:
fig,ax = plt.subplots(figsize=(16,28))
fig.subplots_adjust(hspace=0.5)


plt.subplot(5,2,1)
sns.distplot(x_baseline.ravel(), label='Baseline')
sns.distplot(x_fault.ravel(),label='Faulty')
plt.xlim([-5,5])
plt.ylabel('Population')
plt.title('Distribution of Complete Data',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(x_baseline.ravel()):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(x_baseline.ravel()):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(x_fault.ravel()):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(x_fault.ravel()):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)

plt.subplot(5,2,2)
sns.distplot(x_baseline[0], label='Baseline')
sns.distplot(x_fault[0],label='Faulty')
plt.ylabel('Population')
plt.xlim([-5,5])
plt.title('Distribution in the first 1 second',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(x_baseline[0]):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(x_baseline[0]):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(x_fault[0]):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(x_fault[0]):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)


plt.subplot(5,2,3)
sns.distplot(x_baseline[0][0:5000], label='Baseline')
sns.distplot(x_fault[0][0:5000],label='Faulty')
plt.xlim([-5,5])
plt.ylabel('Population')
plt.title('Distribution of first 5000 samples',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(x_baseline[0][0:5000]):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(x_baseline[0][0:5000]):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(x_fault[0][0:5000]):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(x_fault[0][0:5000]):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)

plt.subplot(5,2,4)
sns.distplot(x_baseline[0][0:1000], label='Baseline')
sns.distplot(x_fault[0][0:1000],label='Faulty')
plt.ylabel('Population')
plt.xlim([-5,5])
plt.title('Distribution of first 1000 samples',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(x_baseline[0][0:1000]):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(x_baseline[0][0:1000]):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(x_fault[0][0:1000]):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(x_fault[0][0:1000]):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)


plt.subplot(5,2,5)
time =  np.arange(0,40,1/f_sample)
N = len(time)
plt.ylim([-20,20])
plt.title('Complete signal data',size=20)
plt.plot(time, x_baseline.ravel(),label='Baseline')
plt.plot(time[0:int(N/4)], x_fault.ravel(),label='Fault')
plt.xlabel('Time')
plt.legend(frameon=False, loc=1)


plt.subplot(5,2,6)
plt.title('Signal data for the first 1 second',size=20)
plt.plot(time[0:int(N/40)], x_baseline[0],label='Baseline')
plt.plot(time[0:int(N/40)], x_fault[0],label='Fault')
plt.ylim([-20,20])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)


plt.subplot(5,2,7)
plt.title('Signal for first 5000 samples',size=20)
plt.plot(time[0:5000], x_baseline[0][0:5000],label='Baseline')
plt.plot(time[0:5000], x_fault[0][0:5000],label='Fault')
plt.ylim([-16,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)


plt.subplot(5,2,8)
plt.title('Signal for  first 1000 samples',size=20)
plt.plot(time[0:1000], x_baseline[0][0:1000],label='Baseline')
plt.plot(time[0:1000], x_fault[0][0:1000],label='Fault')
plt.ylim([-16,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)


plt.subplot(5,2,9)
plt.title('Signal for first 500 samples',size=20)
plt.plot(time[0:500], x_baseline[0][0:500],label='Baseline')
plt.plot(time[0:500], x_fault[0][0:500],label='Fault')
plt.ylim([-16,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=2)


plt.subplot(5,2,10)
plt.title('Signal for first 100 samples',size=20)
plt.plot(time[0:100], x_baseline[0][0:100],label='Baseline')
plt.plot(time[0:100], x_fault[0][0:100],label='Fault')
plt.ylim([-10,10])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1);
  • The distribution of baseline and faulty data is different in the time domain.
  • The distribution remains identical even for 1000 samples.
  • Statistically, both of the distributions are not skewed. However, faulty one has a very high kurtosis (more outliers) (i.e. not close to gaussian distribution).
In [7]:
plt.subplots(figsize=(15,7))

plt.subplot(1,2,1)
stats.probplot(x_baseline.ravel(), dist="norm",plot=plt)
plt.title('Baseline',size=25);

plt.subplot(1,2,2)
stats.probplot(x_fault.ravel(),dist="norm", plot=plt)
plt.title('Faulty',size=25);
  • Baseline data is closer to normal distribution but Faulty data is not.
In [8]:
df=pd.concat([pd.Series(x_baseline.ravel()),pd.Series(x_fault.ravel())],axis=1)
df.columns = ['Baseline','Faulty']
plt.figure(figsize=(7,15))
plt.xticks(size=20)
plt.yticks(size=20)
sns.boxplot(data=df);
  • Another visual conformation that there are several outliers in the faulty case.

FFT

In [9]:
fig = make_subplots(rows=3, cols=1, subplot_titles=['Baseline', 'Faulty', 'Faulty'], shared_yaxes=True)

freqs = np.fft.fftfreq(len(x_baseline[0]), 1/f_sample)
idx = np.argsort(freqs)
ps = ((2/x_baseline[0].size)*np.abs(np.fft.fft(x_baseline[0])))


fig.add_trace(go.Scatter(x=np.abs(freqs[idx]), y=ps[idx], name = 'Baseline'),
              row=1, col=1)


freqs = np.fft.fftfreq(len(x_fault[0]), 1/f_sample)
idx = np.argsort(freqs)
ps = ((2/x_fault[0].size)*np.abs(np.fft.fft(x_fault[0])))


fig.add_trace(go.Scatter(x=np.abs(freqs[idx]), y=ps[idx], name = 'Faulty'), 
              row=2, col=1)

fig.add_trace(go.Scatter(x=np.abs(freqs[idx]/f_bpfo), y=ps[idx], name = 'Faulty'), 
              row=3, col=1)

fig.update_xaxes(title_text="Frequency (Hz)", row=1, col=1)
fig.update_xaxes(title_text="Frequency (Hz)",  row=2, col=1)
fig.update_xaxes(title_text="F_bpfo Multiples", row=3, col=1)
fig.update_yaxes(title_text="Amplitude", row=1, col=1)
fig.update_yaxes(title_text="Amplitude", row=2, col=1)
fig.update_yaxes(title_text="Amplitude", row=3, col=1)
fig.update_layout(title_text="Vibration Spectrum : Baseline vs Faulty", height=700)

fig.show()
  • Additional regions appear in the frequency domain in the case of faulty data (frequency range > 2000 or F_bpfo X 20).

Envelope signal

  • Amplitude Envelope signal has been estimated through hilbert transform, which removes the negative values of time domain data.
In [10]:
analytic_signal_base = hilbert(x_baseline.ravel())
envelope_base = np.abs(analytic_signal_base.ravel())

analytic_signal_fault = hilbert(x_fault.ravel())
envelope_fault = np.abs(analytic_signal_fault.ravel())
In [11]:
fig,ax = plt.subplots(figsize=(16,12))
fig.subplots_adjust(hspace=0.5)


plt.subplot(2,2,1)
sns.distplot(envelope_base, label='Baseline')
sns.distplot(envelope_fault,label='Faulty')
plt.xlim([-5,5])
plt.ylabel('Population')
plt.title('Distribution of Complete Data',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(envelope_base):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(envelope_base):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(envelope_fault):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(envelope_fault):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)

plt.subplot(2,2,2)
plt.title('Complete envelope data',size=20)
plt.plot(time, envelope_base,label='Baseline')
plt.plot(time[0:len(time)//4], envelope_fault,label='Fault')
plt.ylim([0,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)

plt.subplot(2,2,3)
sns.distplot(envelope_base[0:len(envelope_base)//40], label='Baseline')
sns.distplot(envelope_fault[0:len(envelope_fault)//10],label='Faulty')
plt.xlim([-5,5])
plt.ylabel('Population')
plt.title('Distribution of first 1 second',size=20)
plt.annotate(f'Skewness(base) : {stats.skew(envelope_base[0:len(envelope_base)//40]):.2f}', (-4.5,1), size=13)
plt.annotate(f'Kurtosis(base) : {stats.kurtosis(envelope_base[0:len(envelope_base)//40]):.2f}', (-4.5,0.9), size=13)
plt.annotate(f'Skewness(faulty) : {stats.skew(envelope_fault[0:len(envelope_fault)//10]):.2f}', (-4.5,0.8), size=13)
plt.annotate(f'Kurtosis(faulty) : {stats.kurtosis(envelope_fault[0:len(envelope_fault)//10]):.2f}', (-4.5,0.7), size=13)
plt.legend(frameon=False, loc=1)

plt.subplot(2,2,4)
plt.title('Envelope Signal for 1 second',size=20)
plt.plot(time[0:len(time)//40], envelope_base[0:len(envelope_base)//40],label='Baseline')
plt.plot(time[0:len(time)//40], envelope_fault[0:len(envelope_fault)//10],label='Fault')
plt.ylim([0,16])
plt.xlabel('Time(s)')
plt.legend(frameon=False, loc=1)
Out[11]:
<matplotlib.legend.Legend at 0x145feba10>
In [12]:
fig = make_subplots(rows=3, cols=1, subplot_titles=['Baseline', 'Faulty'], shared_yaxes=True)

#fft baseline
freqs = np.fft.fftfreq(len(envelope_base), 1/f_sample)
idx = np.argsort(freqs)
ps = np.abs(np.fft.fft(envelope_base))**2
fig.add_trace(go.Scatter(x=np.abs(freqs[idx]), y=ps[idx], name = 'Baseline'),
              row=1, col=1)


#fft faulty
freqs = np.fft.fftfreq(len(envelope_fault), 1/f_sample)
idx = np.argsort(freqs)
ps = np.abs(np.fft.fft(envelope_fault))**2
fig.add_trace(go.Scatter(x=np.abs(freqs[idx]), y=ps[idx], name = 'Faulty'), 
              row=2, col=1)
fig.add_trace(go.Scatter(x=np.abs(freqs[idx]/f_bpfo), y=ps[idx], name = 'Faulty'), 
              row=3, col=1)

fig.update_xaxes(title_text="Frequency (Hz)", range=[0, 500], row=1, col=1)
fig.update_xaxes(title_text="Frequency (Hz)", range=[0, 500], row=2, col=1)
fig.update_xaxes(title_text="F_bpfo Multiples", range=[0, 5], row=3, col=1)
fig.update_yaxes(title_text="Power",range=[0, 40e8], row=1, col=1)
fig.update_yaxes(title_text="Power",range=[0, 40e8], row=2, col=1)
fig.update_yaxes(title_text="Power",range=[0, 40e8], row=3, col=1)
fig.update_layout(title_text="Vibration Spectrum : Baseline vs Faulty", height=700,annotations=[
        dict(
            x=103.1,
            y=1.84e9,
            xref="x2",
            yref="y2",
            text="F_bpfo x 1",
            showarrow=True,
            arrowhead=7,
            ax=20,
            ay=-40
        )])

fig.show()
  • Baseline and faulty, both data show rotation frequecy in envelope spectrum. However, Faulty one show additional F_bpfo.

Goal 2 : Using machine learning techniques to distinguish the good bearing from the faulty one.

Unsupervised Machine learning

Principal Components Analysis

In [48]:
seq_len = 1000

X =  np.hstack((x_baseline.ravel(),x_fault.ravel()))
labels = np.hstack((np.zeros(len(x_baseline.ravel())), np.ones(len(x_fault.ravel()))))
y = pd.get_dummies(labels).values
In [49]:
X = X.reshape(-1, seq_len)
labels = labels.reshape(-1, seq_len)
In [50]:
labels = labels.max(axis=1)
labels = ['Baseline' if x==0 else 'Faulty' for x in labels]
In [30]:
X_pca = PCA(n_components=2).fit_transform(X)

t-SNE

In [31]:
X_tsne=TSNE(n_components=2,perplexity=30).fit_transform(X)
In [32]:
# Projection of the data on the reduced dimensionality space

fig, ax = plt.subplots(figsize=(12,5))

ax = plt.subplot(1,2,1)
ax = sns.scatterplot(x=X_pca[:,0],y=X_pca[:,1],hue=labels, marker='o' )
ax.legend([],frameon=False,bbox_to_anchor=(1,1))
ax.set_title('PCA',size=30);

ax = plt.subplot(1,2,2)
ax= sns.scatterplot(x=X_tsne[:,0],y=X_tsne[:,1],hue=labels, marker='o' )
ax.legend(frameon=False,bbox_to_anchor=(1.5,1))
ax.set_title('t-SNE',size=30);
  • PCA and t-SNE both successfully reduced the dimensionality of the data and separate baseline and faulty data into different regions, which can be easily clustered.

Clustering (OPTICS)

In [33]:
op = OPTICS(min_samples=50).fit(X_pca)
labels_pca = op.labels_

op = OPTICS(min_samples=50).fit(X_tsne)
labels_tsne = op.labels_
In [34]:
labels_pca = ['Baseline_Cluster' if x==0 else 'Faulty_Cluster' if x==1 else 'Outlier' for x in labels_pca]
labels_tsne = ['Baseline_Cluster' if x==0 else 'Faulty_Cluster' if x==1 else 'Outlier' for x in labels_tsne]
In [41]:
fig, ax = plt.subplots(figsize=(12,10))
plt.subplots_adjust(hspace=0.4)

ax = plt.subplot(2,2,1)
ax = sns.scatterplot(x=X_pca[:,0],y=X_pca[:,1],hue=labels, marker='o',palette='muted' )
ax.legend([],frameon=False,bbox_to_anchor=(1,1))
ax.set_title('PCA',size=30);

ax = plt.subplot(2,2,2)
ax= sns.scatterplot(x=X_tsne[:,0],y=X_tsne[:,1],hue=labels, marker='o',palette='muted' )
ax.legend(frameon=False,bbox_to_anchor=(1,1))
ax.set_title('t-SNE',size=30);

ax = plt.subplot(2,2,3)
ax = sns.scatterplot(x=X_pca[:,0],y=X_pca[:,1],hue=labels_pca, marker='o',palette='Set1' )
ax.legend([],frameon=False,bbox_to_anchor=(1,1))
ax.set_title('PCA + Clustering',size=30);

ax = plt.subplot(2,2,4)
ax = sns.scatterplot(x=X_tsne[:,0],y=X_tsne[:,1],hue=labels_tsne, marker='o' ,palette='Set1' )
ax.legend(frameon=False,bbox_to_anchor=(1,1))
ax.set_title('t-SNE + Clustering',size=30)
sns.despine(top=True);
  • OPTICS nicely predicts faulty and baselines clusters. Also, detects outliers within data (shown in green).

Supervised Machine learning

Support Vector Machine (SVM) on Reduced Data

In [51]:
#standardizing the input data

sc = StandardScaler()
X_pca = sc.fit_transform(X_pca)
labels = LabelEncoder().fit_transform(labels)
#labels = [1 if x==0 else 0 for x in labels]
In [52]:
X_train, X_test, y_train, y_test = train_test_split(X_pca,labels, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size=0.2)
In [53]:
fig, ax = plt.subplots(figsize=(12,10))
plt.subplots_adjust(hspace=0.4,wspace=0.4)

plt.subplot(2,2,1)
sns.countplot(labels)
plt.title('Class distribution in total data',size=20)
plt.xticks(range(0,2),['Baseline','Faulty'])

plt.subplot(2,2,2)
sns.countplot(y_train)
plt.title('Class distribution in training data',size=20)
plt.xticks(range(0,2),['Baseline','Faulty'])

plt.subplot(2,2,3)
sns.countplot(y_test)
plt.title('Class distribution in test data',size=20)
plt.xticks(range(0,2),['Baseline','Faulty'])

plt.subplot(2,2,4)
sns.countplot(y_test)
plt.title('Class distribution in validation data',size=20)
plt.xticks(range(0,2),['Baseline','Faulty']);
sns.despine(top=True)
  • There is a clear class imbalance in the data. Faulty data is 4 times lower than baseline data
In [54]:
svm = SVC().fit(X_train,y_train)
y_pred = svm.predict(X_test)
y_pred_val = svm.predict(X_val)
In [55]:
print(f'Accuracy on Test set : {accuracy_score(y_test,y_pred)}')
print(f'F1-macro on Test set : {f1_score(y_test,y_pred)}')
print(f'Precision on Test set : {precision_score(y_test,y_pred)}')
print(f'Recall on Test set :  {recall_score(y_test,y_pred)}')
Accuracy on Test set : 1.0
F1-macro on Test set : 1.0
Precision on Test set : 1.0
Recall on Test set :  1.0
In [56]:
print(f'Accuracy on Validation set : {accuracy_score(y_val,y_pred_val)}')
print(f'F1-macro on Validation set : {f1_score(y_val,y_pred_val)}')
print(f'Precision on Validation set : {precision_score(y_val,y_pred_val)}')
print(f'Recall on Validation set :  {recall_score(y_val,y_pred_val)}')
Accuracy on Validation set : 1.0
F1-macro on Validation set : 1.0
Precision on Validation set : 1.0
Recall on Validation set :  1.0

Confusion matrix

In [57]:
fig, ax = plt.subplots(figsize=(12,3.5))
plt.subplots_adjust(wspace=0.6)

plt.subplot(1,2,1)
conf_mat_test = confusion_matrix(y_test,y_pred)
sns.heatmap(conf_mat_test, annot=True,annot_kws={'size':20},lw=2,fmt='d')
plt.xticks(np.arange(0.5,2.5,1),['Baseline','Faulty'])
plt.yticks(np.arange(0.5,2.5,1),['Baseline','Faulty'],rotation=0)
plt.title('Confusion matrix (test)',size=20);

plt.subplot(1,2,2)
conf_mat_val = confusion_matrix(y_val,y_pred_val)
sns.heatmap(conf_mat_val, annot=True,annot_kws={'size':20},lw=2)
plt.xticks(np.arange(0.5,2.5,1),['Baseline','Faulty'])
plt.yticks(np.arange(0.5,2.5,1),['Baseline','Faulty'],rotation=0)
plt.title('Confusion matrix (val)',size=20);
  • Perfect score in the confusion matrix and both classes are predicted perfectly.

Cross validation

In [303]:
svm = SVC(random_state=27)
accuracies = cross_val_score(svm,X_pca,labels, cv = 5, scoring='f1_macro')
print(f' Mean Accuracy  : {accuracies.mean().round(3)}')
print(f' STD Accuracy : {accuracies.std().round(3)}')
 Mean Accuracy  : 1.0
 STD Accuracy : 0.0
  • Perfect score after the 5 fold cross validation as well and both classes are predicted perfectly.

SVM on complete data

In [304]:
print(f' Mean  : {X.mean().round(3)}')
print(f' STD  : {X.std().round(3)}')
 Mean  : -0.0
 STD  : 1.0
In [305]:
X_train, X_test, y_train, y_test = train_test_split(X,labels, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size=0.2)
In [306]:
svm = SVC(C=1000,random_state=27).fit(X_train,y_train)
y_pred = svm.predict(X_test)
y_pred_val = svm.predict(X_val)
In [307]:
print(f'Accuracy on Test set : {accuracy_score(y_test,y_pred)}')
print(f'F1-macro on Test set : {f1_score(y_test,y_pred)}')
print(f'Precision on Test set : {precision_score(y_test,y_pred)}')
print(f'Recall on Test set :  {recall_score(y_test,y_pred)}')
Accuracy on Test set : 0.975
F1-macro on Test set : 0.9387755102040816
Precision on Test set : 1.0
Recall on Test set :  0.8846153846153846
In [308]:
print(f'Accuracy on Validation set : {accuracy_score(y_val,y_pred_val)}')
print(f'F1-macro on Validation set : {f1_score(y_val,y_pred_val)}')
print(f'Precision on Validation set : {precision_score(y_val,y_pred_val)}')
print(f'Recall on Validation set :  {recall_score(y_val,y_pred_val)}')
Accuracy on Validation set : 0.96875
F1-macro on Validation set : 0.9090909090909091
Precision on Validation set : 1.0
Recall on Validation set :  0.8333333333333334

Confusion matrix

In [309]:
fig, ax = plt.subplots(figsize=(12,3.5))
plt.subplots_adjust(wspace=0.6)

plt.subplot(1,2,1)
conf_mat_test = confusion_matrix(y_test,y_pred)
sns.heatmap(conf_mat_test, annot=True,annot_kws={'size':20}, fmt='d',lw=2)
plt.xticks(np.arange(0.5,2.5,1),['Baseline','Faulty'])
plt.yticks(np.arange(0.5,2.5,1),['Baseline','Faulty'],rotation=0)
plt.title('Confusion matrix (test)',size=20);

plt.subplot(1,2,2)
conf_mat_val = confusion_matrix(y_val,y_pred_val)
sns.heatmap(conf_mat_val, annot=True,annot_kws={'size':20},lw=2 )
plt.xticks(np.arange(0.5,2.5,1),['Baseline','Faulty'])
plt.yticks(np.arange(0.5,2.5,1),['Baseline','Faulty'],rotation=0)
plt.title('Confusion matrix (val)',size=20);
  • not so perfect score in the confusion matrix and faulty class has been as predicted as baseline in 3,3 occasions in both test and validation data respectively.

Cross validation

In [310]:
accuracies = cross_val_score(svm,X,labels, cv = 5, scoring='f1_macro')
print(f' Mean Accuracy  : {accuracies.mean().round(3)}')
print(f' STD Accuracy : {accuracies.std().round(3)}')
 Mean Accuracy  : 0.964
 STD Accuracy : 0.03
  • 96.4% score after the 10 fold cross validation. Not so bad. However, Dimensionality reduction is clearly better.

oversampling

In [311]:
sm = SVMSMOTE()
X_res, y_res = sm.fit_resample(X_train,y_train)
In [312]:
sns.countplot(y_res)
plt.title('Class distribution after oversampling ',size=20)
plt.xticks(range(0,2),['Baseline','Faulty'])
plt.ylabel('Count',size=15);
In [314]:
svm = SVC(C=1000,random_state=27).fit(X_res,y_res)
y_pred = svm.predict(X_test)
y_pred_val = svm.predict(X_val)
In [315]:
print(f'Accuracy on Test set : {accuracy_score(y_test,y_pred)}')
print(f'F1-macro on Test set : {f1_score(y_test,y_pred)}')
print(f'Precision on Test set : {precision_score(y_test,y_pred)}')
print(f'Recall on Test set :  {recall_score(y_test,y_pred)}')
Accuracy on Test set : 0.9833333333333333
F1-macro on Test set : 0.9600000000000001
Precision on Test set : 1.0
Recall on Test set :  0.9230769230769231
In [316]:
print(f'Accuracy on Validation set : {accuracy_score(y_val,y_pred_val)}')
print(f'F1-macro on Validation set : {f1_score(y_val,y_pred_val)}')
print(f'Precision on Validation set : {precision_score(y_val,y_pred_val)}')
print(f'Recall on Validation set :  {recall_score(y_val,y_pred_val)}')
Accuracy on Validation set : 0.9791666666666666
F1-macro on Validation set : 0.9411764705882353
Precision on Validation set : 1.0
Recall on Validation set :  0.8888888888888888
In [317]:
fig, ax = plt.subplots(figsize=(12,3.5))
plt.subplots_adjust(wspace=0.6)

plt.subplot(1,2,1)
conf_mat_test = confusion_matrix(y_test,y_pred)
sns.heatmap(conf_mat_test, annot=True,annot_kws={'size':20}, fmt='d',lw=2)
plt.xticks(np.arange(0.5,2.5,1),['Baseline','Faulty'])
plt.yticks(np.arange(0.5,2.5,1),['Baseline','Faulty'],rotation=0)
plt.title('Confusion matrix (test)',size=20);

plt.subplot(1,2,2)
conf_mat_val = confusion_matrix(y_val,y_pred_val)
sns.heatmap(conf_mat_val, annot=True,annot_kws={'size':20},lw=2 )
plt.xticks(np.arange(0.5,2.5,1),['Baseline','Faulty'])
plt.yticks(np.arange(0.5,2.5,1),['Baseline','Faulty'],rotation=0)
plt.title('Confusion matrix (val)',size=20);
  • not so perfect score in the confusion matrix and faulty class has been as predicted as baseline in 2,2 occasions in both test and validation data respectively.
In [319]:
accuracies = cross_val_score(svm,X,labels, cv = 5, scoring='f1_macro')
print(f' Mean Accuracy  : {accuracies.mean().round(3)}')
print(f' STD Accuracy : {accuracies.std().round(3)}')
 Mean Accuracy  : 0.964
 STD Accuracy : 0.03
  • 96.4% score after the 5 fold cross validation. Little improvement . Dimensionality reduction is still clearly better.

Recurrents neural networks

In [19]:
X =  np.hstack((x_baseline.ravel(),x_fault.ravel()))
labels = np.hstack((np.zeros(len(x_baseline.ravel())), np.ones(len(x_fault.ravel()))))
y = pd.get_dummies(labels).values
In [15]:
sc = MinMaxScaler()
X=sc.fit_transform(X.reshape(-1,1))
In [20]:
X = X.reshape(-1, seq_len, 1)
y = y.reshape(-1, seq_len, 2)
In [21]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size=0.2)
In [24]:
## 2 Layered biLSTM model

n_units=64
batch=128
n_classes=2
inputs = layers.Input(shape=(seq_len, X_train.shape[2]))
outputs = layers.Dense(n_units, activation='relu')(inputs)    
outputs= layers.Bidirectional(layers.LSTM(n_units, return_sequences=True))(outputs)
outputs = layers.Dropout(0.5)(outputs)
outputs = layers.Bidirectional(layers.LSTM(n_units, return_sequences=True))(outputs)
outputs = layers.Dense(n_classes, activation='sigmoid')(outputs)  
model = Model(inputs=inputs, outputs=outputs)
model.compile('adam', loss='binary_crossentropy',metrics=['accuracy','Precision','Recall'])
model.fit(X_train, y_train, 
          batch_size=batch,
          epochs=30,
          callbacks=[ callbacks.ReduceLROnPlateau(), 
                      callbacks.ModelCheckpoint('BiLSTM-N{}-D0.5-B{}.h5'.format(n_units,batch)),
                      callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10),
                      callbacks.CSVLogger(f"BiLSTM-log-Nodes-{n_units}-dropout-0.5-batchsize-{batch}.csv")],
                      validation_data=(X_test, y_test)
                      )
Train on 384 samples, validate on 120 samples
Epoch 1/30
384/384 [==============================] - 17s 43ms/sample - loss: 0.6238 - accuracy: 0.7692 - Precision: 0.7582 - Recall: 0.7907 - val_loss: 0.4983 - val_accuracy: 0.8250 - val_Precision: 0.8250 - val_Recall: 0.8250
Epoch 2/30
384/384 [==============================] - 11s 27ms/sample - loss: 0.4719 - accuracy: 0.7943 - Precision: 0.7943 - Recall: 0.7943 - val_loss: 0.3723 - val_accuracy: 0.8250 - val_Precision: 0.8250 - val_Recall: 0.8250
Epoch 3/30
384/384 [==============================] - 11s 28ms/sample - loss: 0.4281 - accuracy: 0.7943 - Precision: 0.7943 - Recall: 0.7943 - val_loss: 0.3824 - val_accuracy: 0.8250 - val_Precision: 0.8250 - val_Recall: 0.8250
Epoch 4/30
384/384 [==============================] - 11s 29ms/sample - loss: 0.4260 - accuracy: 0.7943 - Precision: 0.7943 - Recall: 0.7943 - val_loss: 0.3537 - val_accuracy: 0.8250 - val_Precision: 0.8250 - val_Recall: 0.8250
Epoch 5/30
384/384 [==============================] - 11s 29ms/sample - loss: 0.3945 - accuracy: 0.7943 - Precision: 0.7943 - Recall: 0.7943 - val_loss: 0.3565 - val_accuracy: 0.8250 - val_Precision: 0.8250 - val_Recall: 0.8250
Epoch 6/30
384/384 [==============================] - 11s 29ms/sample - loss: 0.3937 - accuracy: 0.7943 - Precision: 0.7943 - Recall: 0.7943 - val_loss: 0.3502 - val_accuracy: 0.8250 - val_Precision: 0.8250 - val_Recall: 0.8250
Epoch 7/30
384/384 [==============================] - 11s 29ms/sample - loss: 0.3775 - accuracy: 0.7943 - Precision: 0.7943 - Recall: 0.7944 - val_loss: 0.3214 - val_accuracy: 0.8252 - val_Precision: 0.8251 - val_Recall: 0.8253
Epoch 8/30
384/384 [==============================] - 11s 28ms/sample - loss: 0.3572 - accuracy: 0.7965 - Precision: 0.7959 - Recall: 0.7976 - val_loss: 0.3070 - val_accuracy: 0.8328 - val_Precision: 0.8306 - val_Recall: 0.8362
Epoch 9/30
384/384 [==============================] - 12s 31ms/sample - loss: 0.3442 - accuracy: 0.8164 - Precision: 0.8138 - Recall: 0.8206 - val_loss: 0.2888 - val_accuracy: 0.8726 - val_Precision: 0.8691 - val_Recall: 0.8772
Epoch 10/30
384/384 [==============================] - 15s 38ms/sample - loss: 0.3214 - accuracy: 0.8633 - Precision: 0.8599 - Recall: 0.8681 - val_loss: 0.2709 - val_accuracy: 0.9074 - val_Precision: 0.9051 - val_Recall: 0.9101
Epoch 11/30
384/384 [==============================] - 11s 30ms/sample - loss: 0.2980 - accuracy: 0.8981 - Precision: 0.8953 - Recall: 0.9016 - val_loss: 0.2458 - val_accuracy: 0.9231 - val_Precision: 0.9214 - val_Recall: 0.9252
Epoch 12/30
384/384 [==============================] - 12s 30ms/sample - loss: 0.2667 - accuracy: 0.9140 - Precision: 0.9118 - Recall: 0.9166 - val_loss: 0.2109 - val_accuracy: 0.9341 - val_Precision: 0.9324 - val_Recall: 0.9361
Epoch 13/30
384/384 [==============================] - 11s 30ms/sample - loss: 0.2248 - accuracy: 0.9296 - Precision: 0.9280 - Recall: 0.9315 - val_loss: 0.1624 - val_accuracy: 0.9496 - val_Precision: 0.9487 - val_Recall: 0.9507
Epoch 14/30
384/384 [==============================] - 11s 30ms/sample - loss: 0.1661 - accuracy: 0.9448 - Precision: 0.9436 - Recall: 0.9462 - val_loss: 0.1081 - val_accuracy: 0.9632 - val_Precision: 0.9621 - val_Recall: 0.9644
Epoch 15/30
384/384 [==============================] - 11s 30ms/sample - loss: 0.1057 - accuracy: 0.9649 - Precision: 0.9634 - Recall: 0.9666 - val_loss: 0.0725 - val_accuracy: 0.9688 - val_Precision: 0.9650 - val_Recall: 0.9728
Epoch 16/30
384/384 [==============================] - 12s 31ms/sample - loss: 0.0611 - accuracy: 0.9765 - Precision: 0.9729 - Recall: 0.9802 - val_loss: 0.0374 - val_accuracy: 0.9874 - val_Precision: 0.9852 - val_Recall: 0.9896
Epoch 17/30
384/384 [==============================] - 12s 32ms/sample - loss: 0.0372 - accuracy: 0.9891 - Precision: 0.9872 - Recall: 0.9911 - val_loss: 0.0239 - val_accuracy: 0.9939 - val_Precision: 0.9934 - val_Recall: 0.9945
Epoch 18/30
384/384 [==============================] - 11s 29ms/sample - loss: 0.0332 - accuracy: 0.9925 - Precision: 0.9922 - Recall: 0.9928 - val_loss: 0.0217 - val_accuracy: 0.9955 - val_Precision: 0.9955 - val_Recall: 0.9955
Epoch 19/30
384/384 [==============================] - 13s 33ms/sample - loss: 0.0140 - accuracy: 0.9981 - Precision: 0.9981 - Recall: 0.9982 - val_loss: 0.0086 - val_accuracy: 0.9995 - val_Precision: 0.9995 - val_Recall: 0.9995
Epoch 20/30
384/384 [==============================] - 11s 30ms/sample - loss: 0.0091 - accuracy: 0.9995 - Precision: 0.9995 - Recall: 0.9994 - val_loss: 0.0057 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 21/30
384/384 [==============================] - 11s 29ms/sample - loss: 0.0061 - accuracy: 1.0000 - Precision: 1.0000 - Recall: 1.0000 - val_loss: 0.0041 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 22/30
384/384 [==============================] - 11s 27ms/sample - loss: 0.0044 - accuracy: 1.0000 - Precision: 1.0000 - Recall: 1.0000 - val_loss: 0.0030 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 23/30
384/384 [==============================] - 11s 28ms/sample - loss: 0.0032 - accuracy: 1.0000 - Precision: 1.0000 - Recall: 1.0000 - val_loss: 0.0023 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 24/30
384/384 [==============================] - 11s 29ms/sample - loss: 0.0028 - accuracy: 0.9998 - Precision: 0.9998 - Recall: 0.9999 - val_loss: 0.0018 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 25/30
384/384 [==============================] - 11s 30ms/sample - loss: 0.0020 - accuracy: 1.0000 - Precision: 1.0000 - Recall: 1.0000 - val_loss: 0.0018 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 26/30
384/384 [==============================] - 12s 31ms/sample - loss: 0.0189 - accuracy: 0.9971 - Precision: 0.9971 - Recall: 0.9971 - val_loss: 0.0013 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 27/30
384/384 [==============================] - 12s 31ms/sample - loss: 0.0088 - accuracy: 0.9987 - Precision: 0.9987 - Recall: 0.9987 - val_loss: 0.0014 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 28/30
384/384 [==============================] - 12s 30ms/sample - loss: 0.0040 - accuracy: 0.9989 - Precision: 0.9989 - Recall: 0.9989 - val_loss: 0.0016 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 29/30
384/384 [==============================] - 11s 30ms/sample - loss: 0.0045 - accuracy: 0.9988 - Precision: 0.9987 - Recall: 0.9988 - val_loss: 0.0011 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Epoch 30/30
384/384 [==============================] - 12s 30ms/sample - loss: 0.0012 - accuracy: 1.0000 - Precision: 1.0000 - Recall: 1.0000 - val_loss: 9.5748e-04 - val_accuracy: 1.0000 - val_Precision: 1.0000 - val_Recall: 1.0000
Out[24]:
<tensorflow.python.keras.callbacks.History at 0x14ef4ac90>
In [25]:
model = load_model('BiLSTM-N64-D0.5-B128.h5')
In [28]:
history =  pd.read_csv('BiLSTM-log-Nodes-64-dropout-0.5-batchsize-128.csv')
history.head(10).round(2).style.background_gradient(cmap='Blues')
Out[28]:
epoch Precision Recall accuracy loss lr val_Precision val_Recall val_accuracy val_loss
0 0 0.760000 0.790000 0.770000 0.620000 0.000000 0.820000 0.820000 0.820000 0.500000
1 1 0.790000 0.790000 0.790000 0.470000 0.000000 0.820000 0.820000 0.820000 0.370000
2 2 0.790000 0.790000 0.790000 0.430000 0.000000 0.820000 0.820000 0.820000 0.380000
3 3 0.790000 0.790000 0.790000 0.430000 0.000000 0.820000 0.820000 0.820000 0.350000
4 4 0.790000 0.790000 0.790000 0.390000 0.000000 0.820000 0.820000 0.820000 0.360000
5 5 0.790000 0.790000 0.790000 0.390000 0.000000 0.820000 0.820000 0.820000 0.350000
6 6 0.790000 0.790000 0.790000 0.380000 0.000000 0.830000 0.830000 0.830000 0.320000
7 7 0.800000 0.800000 0.800000 0.360000 0.000000 0.830000 0.840000 0.830000 0.310000
8 8 0.810000 0.820000 0.820000 0.340000 0.000000 0.870000 0.880000 0.870000 0.290000
9 9 0.860000 0.870000 0.860000 0.320000 0.000000 0.910000 0.910000 0.910000 0.270000

Convergence test

In [32]:
fig, ax = plt.subplots(figsize=(15,10))
fig.subplots_adjust(hspace=0.3)

ax = plt.subplot(2,2,1)
history[['epoch','loss','val_loss']].plot(x='epoch', ax = ax, marker = 'o')
plt.legend(['Training Loss','Validation Loss'],frameon=False)

ax2 = plt.subplot(2,2,2)
history[['epoch','accuracy','val_accuracy']].plot(x='epoch', ax = ax2, marker= 'o')
plt.legend(['Training Accuracy','Validation Accuracy'],frameon=False)

ax3 = plt.subplot(2,2,3)
history[['epoch','Precision','val_Precision']].plot(x='epoch', ax = ax3, marker = 'o')
plt.legend(['Training Precision','Validation Precision'],frameon=False)

ax4 = plt.subplot(2,2,4)
history[['epoch','Recall','val_Recall']].plot(x='epoch', ax=ax4, marker = 'o')
plt.legend(['Training recall','Validation recall'],frameon=False);
  • model converges nicely and doesn't overfit (i.e. val loss > training loss)
In [33]:
y_pred = model.predict(X_val)
y_pred = y_pred.argmax(axis=-1).ravel()
y_val = y_val.argmax(-1).ravel()
In [34]:
print(f'Accuracy on Validation set : {accuracy_score(y_val,y_pred)}')
print(f'F1-macro on Validation set : {f1_score(y_val,y_pred)}')
print(f'Precision on Validation set : {precision_score(y_val,y_pred)}')
print(f'Recall on Validation set :  {recall_score(y_val,y_pred)}')
Accuracy on Validation set : 1.0
F1-macro on Validation set : 1.0
Precision on Validation set : 1.0
Recall on Validation set :  1.0
In [35]:
conf_mat_val = confusion_matrix(y_val,y_pred)
_, counts = np.unique(y_val,return_counts=True)
conf_mat = (conf_mat_val/counts)*100
sns.heatmap(conf_mat.round(2), annot=True, fmt='.2f',lw=2, annot_kws={'size':20},cbar_kws={'label':'Class accuracy % '})
plt.xticks(np.arange(0.5,2.5,1),['Faulty','Baseline'])
plt.yticks(np.arange(0.5,2.5,1),['Faulty','Baseline'],rotation=0)
plt.title('Confusion matrix (val)',size=20);
In [ ]: